import pandas as pd
import numpy as np
from sklearn.metrics import cohen_kappa_score

# =========================
# 基本参数
# =========================
file_path = "News Titles Sentiments_LLM and VADER.xlsx"
random_seed = 42
total_sample_size = 150

np.random.seed(random_seed)

# =========================
# 读取所有 sheet
# =========================
xls = pd.ExcelFile(file_path)
years = xls.sheet_names

all_data = []

for year in years:
    df = pd.read_excel(xls, sheet_name=year)
    df["Year"] = year
    all_data.append(df)

data = pd.concat(all_data, ignore_index=True)

# =========================
# （1）Grok == GPT 的分类型抽样
# =========================
agree_df = data[data["Grok"] == data["GPT"]].copy()
agree_df["Sentiment"] = agree_df["Grok"]

# 计算每类情感的抽样数量（尽量均衡）
sentiment_counts = agree_df["Sentiment"].value_counts()
num_classes = sentiment_counts.shape[0]
per_class_sample = total_sample_size // num_classes

sampled_list = []

for sentiment, group in agree_df.groupby("Sentiment"):
    n = min(len(group), per_class_sample)
    sampled_list.append(group.sample(n=n, random_state=random_seed))

sampled_agree = pd.concat(sampled_list, ignore_index=True)

# =========================
# （2）Grok ≠ GPT 且 Human 为空的行
# =========================
disagree_df = data[data["Grok"] != data["GPT"]].copy()

human_missing = disagree_df[
    disagree_df["Human"].isna() | (disagree_df["Human"].astype(str).str.strip() == "")
]

# =========================
# （3）Grok ≠ GPT 情况下的 LLM 正确率
# =========================
disagree_with_human = disagree_df[
    ~(disagree_df["Human"].isna() | (disagree_df["Human"].astype(str).str.strip() == ""))
].copy()

disagree_with_human["LLM_correct"] = (
    (disagree_with_human["Grok"] == disagree_with_human["Human"]) |
    (disagree_with_human["GPT"] == disagree_with_human["Human"])
)

llm_accuracy = disagree_with_human["LLM_correct"].mean()

# =========================
# （4）定义 LLM 的单一预测标签，供 Cohen's κ 使用
# =========================
def define_llm_label(row):
    if row["Grok"] == row["GPT"]:
        # 两者一致，直接取该标签
        return row["Grok"]
    else:
        # 不一致时，优先取和 Human 一致的标签
        if row["Grok"] == row["Human"]:
            return row["Grok"]
        elif row["GPT"] == row["Human"]:
            return row["GPT"]
        else:
            # 两者都不等于 Human，可以选择取 Grok（或GPT）
            return row["Grok"]

# 只在有 Human 标签且 Grok≠GPT 的数据上计算
disagree_with_human["LLM_Label"] = disagree_with_human.apply(define_llm_label, axis=1)

# 计算 Cohen's kappa
cohen_kappa = cohen_kappa_score(disagree_with_human["LLM_Label"], disagree_with_human["Human"])

# =========================
# 结果输出
# =========================
output_path = "Sentiments_Analysis_Output.xlsx"

with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
    sampled_agree.to_excel(writer, sheet_name="Sampled_Grok_EQ_GPT", index=False)
    human_missing.to_excel(writer, sheet_name="Disagree_Human_Missing", index=False)
    disagree_with_human.to_excel(writer, sheet_name="Disagree_With_Human", index=False)

print("抽样数量（Grok == GPT）：", len(sampled_agree))
print("Grok ≠ GPT 且 Human 缺失行数：", len(human_missing))
print("Grok ≠ GPT 情况下 Sentiments-LLM 正确率：", round(llm_accuracy, 4))
print("Grok ≠ GPT 情况下 LLM 与 Human 的 Cohen's κ：", round(cohen_kappa, 4))
print("结果已保存至：", output_path)
